Packages
#install.packages("haven")
library(haven)
library(readxl)
library(ggplot2)
#install.packages("moments")
library(moments)
#install.packages("e1071") # this package will be used to calculate excess kurtosis which is what SPSS calculates
library(e1071)
#install.packages("plotly") #for fun interactive plots!
library(plotly)
Read in data
CutenessData<- read_excel("/users/uyenchu/Downloads/YEAR 4/Regression/Cutenessdata.xlsx")
Assumption Checking
Normality of Outcome Variable
Histogram
hist1<-ggplot(CutenessData, aes(cuteness)) + geom_histogram(fill="purple", color="black")+ geom_vline(aes(xintercept=mean(cuteness)), color="#000000", linetype="dashed") # this is a histogram with the colors edited for easier viewing, I have also included a mean line (the dashed line) to help visualize things
Skewness and Kurtosis
kurtosis(CutenessData$cuteness) #excess kurtosis calculation
## [1] 0.09250901
skewness(CutenessData$cuteness)
## [1] -0.5656087
Outliers
Histogram
hist1<-ggplot(CutenessData, aes(cuteness)) + geom_histogram(fill="purple", color="black")+ geom_vline(aes(xintercept=mean(cuteness)), color="#000000", linetype="dashed")
hist2<-ggplot(CutenessData, aes(catlove)) + geom_histogram(fill="lightgreen", color="black")+ geom_vline(aes(xintercept=mean(catlove)), color="#000000", linetype="dashed")
Z_Scores
zscoreCuteness <-(CutenessData$cuteness-mean(CutenessData$cuteness))/sd(CutenessData$cuteness)
table(zscoreCuteness)
## zscoreCuteness
## -3.34215861173202 -2.77707437237283 -2.49453225269323 -2.42389672277333
## 2 6 1 2
## -2.07071907317384 -1.92944801333404 -1.71754142357434 -1.64690589365444
## 5 1 5 5
## -1.36436377397485 -1.08182165429525 -1.01118612437535 -0.799279534615656
## 33 4 14 8
## -0.658008474775858 -0.51673741493606 -0.304830825176362 -0.234195295256464
## 33 11 35 14
## 0.0483468244231329 0.330888944102729 0.401524474022628 0.472160003942527
## 70 11 26 3
## 0.613431063782325 0.754702123622124 0.895973183461922 1.10787977322162
## 10 42 11 27
## 1.17851530314152 1.46105742282111
## 5 47
Box Plots
boxplot1<-ggplot(CutenessData, aes(y=cuteness)) + geom_boxplot()
boxplot1

boxplot2<-ggplot(CutenessData, aes(y=catlove)) + geom_boxplot()
boxplot2

Linearity
Scatterplot
scatterplot1<-ggplot(CutenessData, aes(x=catlove, y=cuteness)) + geom_point(color="black",size=2)
ggplotly(scatterplot1) # fun interactive scatterplot!
Correlation
cor.test(CutenessData$cuteness, CutenessData$catlove, method="pearson")
##
## Pearson's product-moment correlation
##
## data: CutenessData$cuteness and CutenessData$catlove
## t = 4.2118, df = 429, p-value = 3.089e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1068249 0.2882998
## sample estimates:
## cor
## 0.1992702
Regression
lmcuteness<-lm(cuteness~catlove, data=CutenessData)
summary(lmcuteness) #gives R^2, F test, Significance, coefficient
##
## Call:
## lm(formula = cuteness ~ catlove, data = CutenessData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.51745 -0.38160 0.03449 0.53449 1.26240
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.50970 0.11334 22.144 < 2e-16 ***
## catlove 0.15194 0.03607 4.212 3.09e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6945 on 429 degrees of freedom
## Multiple R-squared: 0.03971, Adjusted R-squared: 0.03747
## F-statistic: 17.74 on 1 and 429 DF, p-value: 3.089e-05
anova(lmcuteness) # gives residuals, sum of squares
## Analysis of Variance Table
##
## Response: cuteness
## Df Sum Sq Mean Sq F value Pr(>F)
## catlove 1 8.556 8.5555 17.739 3.089e-05 ***
## Residuals 429 206.902 0.4823
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1